"""
    Extract worker's work histories provided in resumes to Glassdoor.
"""

fileSeed = "C:/Users/jsock/Dropbox/Research/GD/International"

functionPathA = fileSeed + '/Replication/Functions'
functionPathB = "/" + fileSeed + '/Replication/Functions/'

savePath = fileSeed + "/Replication/Data/"

import sys
#sys.path.append('/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages')
sys.path.insert(0, functionPathA)
sys.path.insert(0, functionPathB)
from Function_extract_resumes_workexp import extractResumeWorkExp
from Function_extract_resumes_workexp import extractResumeWorkExpNoBirthYear

import pymssql
import pandas as pd
import yaml
import os
import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt
import csv
import statsmodels.api as sm

#Run to setup connection to Gandalf
config = yaml.load(open(os.path.expanduser('~/config.yaml')))
conn = pymssql.connect(**config.get('gandalf'))

#----------------------------------------------------------
# Loop over start of range for birth years and extract resume work exp
#----------------------------------------------------------

for ii in range(1955,2006,1):
    
    startYear = ii

    endYear = startYear + 1

    print("Current birthYear range: " + str(startYear) + "-" + str(endYear))

    dfWorkExp  = extractResumeWorkExp(conn, startYear, endYear, 0.85)

    print("Data read in.")

    dfWorkExp.to_csv(savePath + 'Resumes_workexp_birthYear_' + str(startYear) + '-' + str(endYear) + '_01_03_2022.csv')

    print("Data saved.")

    del dfWorkExp

#----------------------------------------------------------
# Extract resume work exp when no birth year available
#----------------------------------------------------------

print("Current birthYear range: NA")

dfWorkExp  = extractResumeWorkExpNoBirthYear(conn, 0.85)

print("Data read in.")

dfWorkExp.to_csv(savePath + 'Resumes_workexp_noBirthYear_01_03_2022.csv')

print("Data saved.")

del dfWorkExp
